{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# `cumulative_comparisons_to_be_scored_from_blocking_rules_chart`\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "tags": [
     "hide_input"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-cdcf2afc9afa4faaa7d123d1fe8f7bdd.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-cdcf2afc9afa4faaa7d123d1fe8f7bdd.vega-embed details,\n",
       "  #altair-viz-cdcf2afc9afa4faaa7d123d1fe8f7bdd.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-cdcf2afc9afa4faaa7d123d1fe8f7bdd\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-cdcf2afc9afa4faaa7d123d1fe8f7bdd\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-cdcf2afc9afa4faaa7d123d1fe8f7bdd\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.17.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.17.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-75d04db0f4c0716967a977404e6a2d09\"}, \"mark\": \"bar\", \"encoding\": {\"order\": {\"field\": \"cumulative_rows\"}, \"tooltip\": [{\"field\": \"blocking_rule\", \"title\": \"SQL Condition\", \"type\": \"nominal\"}, {\"field\": \"row_count\", \"format\": \",\", \"title\": \"Comparisons Generated\", \"type\": \"quantitative\"}, {\"field\": \"cumulative_rows\", \"format\": \",\", \"title\": \"Cumulative Comparisons\", \"type\": \"quantitative\"}, {\"field\": \"cartesian\", \"format\": \",\", \"title\": \"Total comparisons in Cartesian product\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"start\", \"title\": \"Comparisons Generated by Rule(s)\", \"type\": \"quantitative\"}, \"x2\": {\"field\": \"cumulative_rows\"}, \"y\": {\"field\": \"blocking_rule\", \"sort\": [\"-x2\"], \"title\": \"SQL Blocking Rule\"}}, \"height\": {\"step\": 20}, \"title\": {\"text\": \"Count of Additional Comparisons Generated by Each Blocking Rule\", \"subtitle\": \"(Counts exclude comparisons already generated by previous rules)\"}, \"width\": 450, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.9.3.json\", \"datasets\": {\"data-75d04db0f4c0716967a977404e6a2d09\": [{\"blocking_rule\": \"l.\\\"first_name\\\" = r.\\\"first_name\\\"\", \"row_count\": 1998, \"cumulative_rows\": 1998, \"cartesian\": 499500, \"match_key\": \"0\", \"start\": 0}, {\"blocking_rule\": \"l.\\\"surname\\\" = r.\\\"surname\\\"\", \"row_count\": 1351, \"cumulative_rows\": 3349, \"cartesian\": 499500, \"match_key\": \"1\", \"start\": 1998}, {\"blocking_rule\": \"l.\\\"email\\\" = r.\\\"email\\\"\", \"row_count\": 315, \"cumulative_rows\": 3664, \"cartesian\": 499500, \"match_key\": \"2\", \"start\": 3349}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.Chart(...)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chart"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "!!! info \"At a glance\"\n",
    "    **Useful for:** Counting the number of comparisons generated by Blocking Rules.\n",
    "\n",
    "    **API Documentation:** [cumulative_comparisons_to_be_scored_from_blocking_rules_chart()](../api_docs/blocking_analysis.md#splink.blocking_analysis.cumulative_comparisons_to_be_scored_from_blocking_rules_chart)\n",
    "\n",
    "    **What is needed to generate the chart?** A `linker` with some data and a settings dictionary defining some Blocking Rules.\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### What the chart shows\n",
    "\n",
    "The `cumulative_comparisons_to_be_scored_from_blocking_rules_chart` shows the count of pairwise comparisons generated by a set of blocking rules.\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "??? note \"What the chart tooltip shows\"\n",
    "\n",
    "    ![](./img/cumulative_num_comparisons_from_blocking_rules_chart_tooltip.png)\n",
    "\n",
    "    The tooltip shows a number of statistics based on the bar that the user is hovering over, including:\n",
    "\n",
    "    - The blocking rule as an SQL statement.\n",
    "    - The number of additional pairwise comparisons generated by the blocking rule.\n",
    "    - The cumulative number of pairwise comparisons generated by the blocking rule and the previous blocking rules.\n",
    "    - The total number of possible pariwise comparisons (i.e. the Cartesian product). This represents the number of comparisons which would need to be evaluated if no blocking was implemented.\n",
    "    - The percentage of possible pairwise comparisons excluded by the blocking rule and the previous blocking rules (i.e. the Reduction Ratio). This is calculated as $1-\\frac{\\textsf{cumulative comparisons}}{\\textsf{total possible comparisons}}$.\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<hr>\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to interpret the chart\n",
    "\n",
    "Blocking rules are order dependent, therefore each bar in this chart shows the **additional** comparisons generated ontop of the previous blocking rules.\n",
    "\n",
    "For example, the chart above shows an exact match on `surname` generates an additional 1351 comparisons. If we reverse the order of the `surname` and `first_name` blocking rules:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-14f55ae2d0334fdfaea5943f9760ee05.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-14f55ae2d0334fdfaea5943f9760ee05.vega-embed details,\n",
       "  #altair-viz-14f55ae2d0334fdfaea5943f9760ee05.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-14f55ae2d0334fdfaea5943f9760ee05\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-14f55ae2d0334fdfaea5943f9760ee05\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-14f55ae2d0334fdfaea5943f9760ee05\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.17.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.17.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-e35d4066317d9d2ed0a7880a0a3727e8\"}, \"mark\": \"bar\", \"encoding\": {\"order\": {\"field\": \"cumulative_rows\"}, \"tooltip\": [{\"field\": \"blocking_rule\", \"title\": \"SQL Condition\", \"type\": \"nominal\"}, {\"field\": \"row_count\", \"format\": \",\", \"title\": \"Comparisons Generated\", \"type\": \"quantitative\"}, {\"field\": \"cumulative_rows\", \"format\": \",\", \"title\": \"Cumulative Comparisons\", \"type\": \"quantitative\"}, {\"field\": \"cartesian\", \"format\": \",\", \"title\": \"Total comparisons in Cartesian product\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"start\", \"title\": \"Comparisons Generated by Rule(s)\", \"type\": \"quantitative\"}, \"x2\": {\"field\": \"cumulative_rows\"}, \"y\": {\"field\": \"blocking_rule\", \"sort\": [\"-x2\"], \"title\": \"SQL Blocking Rule\"}}, \"height\": {\"step\": 20}, \"title\": {\"text\": \"Count of Additional Comparisons Generated by Each Blocking Rule\", \"subtitle\": \"(Counts exclude comparisons already generated by previous rules)\"}, \"width\": 450, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.9.3.json\", \"datasets\": {\"data-e35d4066317d9d2ed0a7880a0a3727e8\": [{\"blocking_rule\": \"l.\\\"surname\\\" = r.\\\"surname\\\"\", \"row_count\": 1638, \"cumulative_rows\": 1638, \"cartesian\": 499500, \"match_key\": \"0\", \"start\": 0}, {\"blocking_rule\": \"l.\\\"first_name\\\" = r.\\\"first_name\\\"\", \"row_count\": 1711, \"cumulative_rows\": 3349, \"cartesian\": 499500, \"match_key\": \"1\", \"start\": 1638}, {\"blocking_rule\": \"l.\\\"email\\\" = r.\\\"email\\\"\", \"row_count\": 315, \"cumulative_rows\": 3664, \"cartesian\": 499500, \"match_key\": \"2\", \"start\": 3349}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.Chart(...)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "blocking_rules_for_analysis = [\n",
    "    block_on(\"surname\"),\n",
    "    block_on(\"first_name\"),\n",
    "    block_on(\"email\"),\n",
    "]\n",
    "\n",
    "cumulative_comparisons_to_be_scored_from_blocking_rules_chart(\n",
    "    table_or_tables=df,\n",
    "    blocking_rules=blocking_rules_for_analysis,\n",
    "    db_api=db_api,\n",
    "    link_type=\"dedupe_only\",\n",
    ")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The total number of comparisons is the same (3,664), but now 1,638 have been generated by the `surname` blocking rule. This suggests that 287 record comparisons have the same `first_name` and `surname`.\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<hr>\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Actions to take as a result of the chart\n",
    "\n",
    "The main aim of this chart is to understand how many comparisons are generated by blocking rules that the Splink model will consider. The number of comparisons is the main primary driver of the amount of computational resource required for Splink model training, predictions etc. (i.e. how long things will take to run).\n",
    "\n",
    "The number of comparisons that are appropriate for a model varies. In general, if a model is taking hours to run (unless you are working with 100+ million records), it could be helpful to reduce the number of comparisons by defining more restrictive blocking rules.\n",
    "\n",
    "For instance, there are many people who could share the same `first_name` in the example above you may want to add an additonal requirement for a match on `dob` as well to reduce the number of records the model needs to consider.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-c5ae604108c04eddb87f9e8618f57feb.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-c5ae604108c04eddb87f9e8618f57feb.vega-embed details,\n",
       "  #altair-viz-c5ae604108c04eddb87f9e8618f57feb.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-c5ae604108c04eddb87f9e8618f57feb\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-c5ae604108c04eddb87f9e8618f57feb\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-c5ae604108c04eddb87f9e8618f57feb\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.17.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.17.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-ebda9afe61e5689499001fc9f76d6aea\"}, \"mark\": \"bar\", \"encoding\": {\"order\": {\"field\": \"cumulative_rows\"}, \"tooltip\": [{\"field\": \"blocking_rule\", \"title\": \"SQL Condition\", \"type\": \"nominal\"}, {\"field\": \"row_count\", \"format\": \",\", \"title\": \"Comparisons Generated\", \"type\": \"quantitative\"}, {\"field\": \"cumulative_rows\", \"format\": \",\", \"title\": \"Cumulative Comparisons\", \"type\": \"quantitative\"}, {\"field\": \"cartesian\", \"format\": \",\", \"title\": \"Total comparisons in Cartesian product\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"start\", \"title\": \"Comparisons Generated by Rule(s)\", \"type\": \"quantitative\"}, \"x2\": {\"field\": \"cumulative_rows\"}, \"y\": {\"field\": \"blocking_rule\", \"sort\": [\"-x2\"], \"title\": \"SQL Blocking Rule\"}}, \"height\": {\"step\": 20}, \"title\": {\"text\": \"Count of Additional Comparisons Generated by Each Blocking Rule\", \"subtitle\": \"(Counts exclude comparisons already generated by previous rules)\"}, \"width\": 450, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.9.3.json\", \"datasets\": {\"data-ebda9afe61e5689499001fc9f76d6aea\": [{\"blocking_rule\": \"(l.\\\"first_name\\\" = r.\\\"first_name\\\") AND (l.\\\"dob\\\" = r.\\\"dob\\\")\", \"row_count\": 272, \"cumulative_rows\": 272, \"cartesian\": 499500, \"match_key\": \"0\", \"start\": 0}, {\"blocking_rule\": \"l.\\\"surname\\\" = r.\\\"surname\\\"\", \"row_count\": 1528, \"cumulative_rows\": 1800, \"cartesian\": 499500, \"match_key\": \"1\", \"start\": 272}, {\"blocking_rule\": \"l.\\\"email\\\" = r.\\\"email\\\"\", \"row_count\": 413, \"cumulative_rows\": 2213, \"cartesian\": 499500, \"match_key\": \"2\", \"start\": 1800}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.Chart(...)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "blocking_rules_for_analysis = [\n",
    "    block_on(\"first_name\", \"dob\"),\n",
    "    block_on(\"surname\"),\n",
    "    block_on(\"email\"),\n",
    "]\n",
    "\n",
    "\n",
    "cumulative_comparisons_to_be_scored_from_blocking_rules_chart(\n",
    "    table_or_tables=df,\n",
    "    blocking_rules=blocking_rules_for_analysis,\n",
    "    db_api=db_api,\n",
    "    link_type=\"dedupe_only\",\n",
    ")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here, the total number of records pairs considered by the model have been reduced from 3,664 to 2,213.\n",
    "\n",
    "!!! note \"Further Reading\"\n",
    "    :simple-readme: For a deeper dive on blocking, please refer to the [Blocking Topic Guides](../topic_guides/blocking/blocking_rules.md).\n",
    "\n",
    "    :material-tools: For more on the blocking tools in Splink, please refer to the [Blocking API documentation](../api_docs/blocking.md).\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Worked Example\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "tags": [
     "hide_input"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-1ad373afe74f4bbe96278bc2066d8962.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-1ad373afe74f4bbe96278bc2066d8962.vega-embed details,\n",
       "  #altair-viz-1ad373afe74f4bbe96278bc2066d8962.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-1ad373afe74f4bbe96278bc2066d8962\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-1ad373afe74f4bbe96278bc2066d8962\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-1ad373afe74f4bbe96278bc2066d8962\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.17.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.17.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-75d04db0f4c0716967a977404e6a2d09\"}, \"mark\": \"bar\", \"encoding\": {\"order\": {\"field\": \"cumulative_rows\"}, \"tooltip\": [{\"field\": \"blocking_rule\", \"title\": \"SQL Condition\", \"type\": \"nominal\"}, {\"field\": \"row_count\", \"format\": \",\", \"title\": \"Comparisons Generated\", \"type\": \"quantitative\"}, {\"field\": \"cumulative_rows\", \"format\": \",\", \"title\": \"Cumulative Comparisons\", \"type\": \"quantitative\"}, {\"field\": \"cartesian\", \"format\": \",\", \"title\": \"Total comparisons in Cartesian product\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"start\", \"title\": \"Comparisons Generated by Rule(s)\", \"type\": \"quantitative\"}, \"x2\": {\"field\": \"cumulative_rows\"}, \"y\": {\"field\": \"blocking_rule\", \"sort\": [\"-x2\"], \"title\": \"SQL Blocking Rule\"}}, \"height\": {\"step\": 20}, \"title\": {\"text\": \"Count of Additional Comparisons Generated by Each Blocking Rule\", \"subtitle\": \"(Counts exclude comparisons already generated by previous rules)\"}, \"width\": 450, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.9.3.json\", \"datasets\": {\"data-75d04db0f4c0716967a977404e6a2d09\": [{\"blocking_rule\": \"l.\\\"first_name\\\" = r.\\\"first_name\\\"\", \"row_count\": 1998, \"cumulative_rows\": 1998, \"cartesian\": 499500, \"match_key\": \"0\", \"start\": 0}, {\"blocking_rule\": \"l.\\\"surname\\\" = r.\\\"surname\\\"\", \"row_count\": 1351, \"cumulative_rows\": 3349, \"cartesian\": 499500, \"match_key\": \"1\", \"start\": 1998}, {\"blocking_rule\": \"l.\\\"email\\\" = r.\\\"email\\\"\", \"row_count\": 315, \"cumulative_rows\": 3664, \"cartesian\": 499500, \"match_key\": \"2\", \"start\": 3349}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.Chart(...)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from splink import DuckDBAPI, block_on, splink_datasets\n",
    "from splink.blocking_analysis import (\n",
    "    cumulative_comparisons_to_be_scored_from_blocking_rules_chart,\n",
    ")\n",
    "\n",
    "db_api = DuckDBAPI()\n",
    "\n",
    "df = splink_datasets.fake_1000\n",
    "\n",
    "blocking_rules_for_analysis = [\n",
    "    block_on(\"first_name\"),\n",
    "    block_on(\"surname\"),\n",
    "    block_on(\"email\"),\n",
    "]\n",
    "\n",
    "chart = cumulative_comparisons_to_be_scored_from_blocking_rules_chart(\n",
    "    table_or_tables=df,\n",
    "    blocking_rules=blocking_rules_for_analysis,\n",
    "    db_api=db_api,\n",
    "    link_type=\"dedupe_only\",\n",
    ")\n",
    "chart"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
